# -*- coding: utf-8 -*-
import cv2
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
def rgbImg(img):
result = 0
try:
result = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
except:
result = img
return result
def grayImg(img):
return cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
def getImg(filename):
#path = 'D:\\AOI\\OCR_data\\{}.jpg'.format(filename)
#img = cv2.imread(path)
img = cv2.imread(filename)
if img == None:
raise RuntimeError('Can not read img from the path.')
return img
def normalize(img):
img_back = img.copy()
cv2.normalize(img, img_back, 0, 255, cv2.NORM_MINMAX)
return img_back
def find_text_area(img):
texts = []
gray = grayImg(img)
bl = cv2.medianBlur(gray, 3)
ret, th = cv2.threshold(bl,20,255, cv2.THRESH_BINARY_INV)
kernel = cv2.getStructuringElement(cv2.MORPH_ERODE,(3, 3))
er1 = cv2.erode(th, kernel, 1)
zero_cnts = cv2.findNonZero(er1)
window = np.zeros(th.shape, dtype = np.uint8)
cv2.drawContours(window, [zero_cnts], 0, (255,255,255), 1)
er_kernel = cv2.getStructuringElement(cv2.MORPH_ERODE,(5, 7))
er2 = cv2.erode(window, er_kernel, 1)
di_kernel = cv2.getStructuringElement(cv2.MORPH_DILATE,(5, 7))
di1 = cv2.dilate(er2, di_kernel, 1)
image, cnts2, hierarchy= cv2.findContours(di1, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
for i, c in enumerate(cnts2):
min_rect = cv2.minAreaRect(c)
box = np.int0(cv2.boxPoints(min_rect))
rec_sum = box.sum(axis=1)
point1 = box[np.argmin(rec_sum)]
point2 = box[np.argmax(rec_sum)]
text_img = img[point1[1]+1:point2[1], point1[0]:point2[0]]
texts.append(text_img)
return texts
def seg_num(text, size):
seg_list = []
um = text[:,143:,:]
num_right = text[:,106:143,:]
dot = text[:,100:106,:]
num_left = text[:,:100,:]
seg_list.append(resize(um, size))
#plt.imshow(rgbImg(um))
#plt.show()
right_length = 143-106
for i in xrange(0, (143-106)/12):
seg_img = num_right[:,right_length-12:right_length,:]
seg_list.append(resize(seg_img, size))
#plt.imshow(rgbImg(seg_img))
#plt.show()
right_length = right_length - 12
seg_list.append(resize(dot, size))
#plt.imshow(rgbImg(dot))
#plt.show()
left_length = 100
for i in xrange(0, 100/12):
seg_img = num_left[:,left_length-12:left_length,:]
seg_list.append(resize(seg_img, size))
#plt.imshow(rgbImg(seg_img))
#plt.show()
left_length = left_length - 12
return seg_list
def resize(img, size):
if not isinstance(img, np.ndarray):
raise TypeError('Input image type is not numpy.ndarray.')
elif not isinstance(size, tuple):
raise TypeError('Input size type is not tuple.')
elif len(size) > 2 or len(size)< 2:
raise ValueError('Input size is invalid.')
return cv2.resize(img, size, interpolation=cv2.INTER_LINEAR)
#辨識text區塊處理流程實驗區
# -*- coding: utf-8 -*-
img = getImg('0201-S2-維修-殘膠-座標'.decode('utf8').encode('big5'))
gray = grayImg(img)
print 'Gray-scale input:'
plt.imshow(gray,'gray')
plt.show()
gcopy = gray.copy()
bl = cv2.medianBlur(gcopy, 3)
print 'Median blur:'
plt.imshow(bl,'gray')
plt.show()
ret, th = cv2.threshold(bl,20,255, cv2.THRESH_BINARY_INV)
print 'Treshhold of binary inv:'
plt.imshow(th,'gray')
plt.show()
kernel = cv2.getStructuringElement(cv2.MORPH_ERODE,(3, 3))
er1 = cv2.erode(th, kernel, 1)
print 'Erode with 3*3 kernel'
plt.imshow(er1, 'gray')
plt.show()
zero_cnts = cv2.findNonZero(er1)
window = np.zeros(th.shape, dtype = np.uint8)
cv2.drawContours(window, [zero_cnts], 0, (255,255,255), 1)
print 'Find contours of non-zero area :'
plt.imshow(window, 'gray')
plt.show()
er_kernel = cv2.getStructuringElement(cv2.MORPH_ERODE,(5, 7))
er2 = cv2.erode(window, er_kernel, 1)
print 'Erode with 5*7 kernel:'
plt.imshow(er2, 'gray')
plt.show()
di_kernel = cv2.getStructuringElement(cv2.MORPH_DILATE,(5, 7))
di1 = cv2.dilate(er2, di_kernel, 1)
print 'Dilate with 5*7 kernel:'
plt.imshow(di1, 'gray')
plt.show()
copy = img.copy()
texts = []
image, cnts2, hierarchy= cv2.findContours(di1, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
for i, c in enumerate(cnts2):
min_rect = cv2.minAreaRect(c)
box = np.int0(cv2.boxPoints(min_rect))
rec_sum = box.sum(axis=1)
point1 = box[np.argmin(rec_sum)]
point2 = box[np.argmax(rec_sum)]
rect_frame = cv2.rectangle(copy, tuple(point1), tuple(point2), (0,255,0), 1)
window = np.zeros(img.shape, dtype = np.uint8)
mask = cv2.rectangle(window, tuple(point1), tuple(point2), (255,255,255), -1)
print 'Create the text-mask by contours:'
plt.imshow(mask,'gray')
plt.show()
text = cv2.bitwise_and(img, mask)
texts.append(text)
print 'Find the intersection_{} of input and the mask_{}:'.format(i,i)
plt.imshow(rgbImg(text))
plt.show()
text_img = img[point1[1]+1:point2[1], point1[0]:point2[0]]
plt.imshow(rgbImg(text_img))
plt.show()
print 'Find the texts:'
plt.imshow(rgbImg(copy))
plt.show()
#切割數字實驗區
img = getImg('0064-S0-維修-殘膠-座標'.decode('utf8').encode('big5'))
plt.imshow(rgbImg(img))
plt.show()
texts = find_text_area(img)
text = texts[0]
#um固定從143開始, 故0~143的數值區塊大小恒定
#數字寬度固定為12, 小數點寬度6
#小數點位置固定在100~106
#小數點左側區塊以100倒數, 每隔12像素切割一個區塊, 最左側不足寬度12的區塊捨去
um = text[:,143:,:]
num_right = text[:,106:143,:]
dot = text[:,100:106,:]
num_left = text[:,:100,:]
plt.imshow(rgbImg(um))
plt.show()
right_length = 143-106
for i in xrange(0, (143-106)/12):
plt.imshow(rgbImg(num_right[:,right_length-12:right_length,:]))
plt.show()
right_length = right_length - 12
plt.imshow(rgbImg(dot))
plt.show()
left_length = 100
for i in xrange(0, 100/12):
plt.imshow(rgbImg(num_left[:,left_length-12:left_length,:]))
plt.show()
left_length = left_length - 12
# XYZ深黑色, 5位數+3位小數
img = getImg('0201-S2-維修-殘膠-座標'.decode('utf8').encode('big5'))
plt.imshow(rgbImg(img))
plt.show()
texts = find_text_area(img)
for text in texts:
plt.imshow(rgbImg(text))
plt.show()
for text in texts:
seg_num(text)
# 無title + XYZ深黑色
img = getImg('0070-S0-維修-殘膠-座標'.decode('utf8').encode('big5'))
plt.imshow(rgbImg(img))
plt.show()
texts = find_text_area(img)
for text in texts:
plt.imshow(rgbImg(text))
plt.show()
for text in texts:
seg_num(text)
img = getImg('0045-S2-維修-殘膠-座標'.decode('utf8').encode('big5'))
plt.imshow(rgbImg(img))
plt.show()
texts = find_text_area(img)
for text in texts:
plt.imshow(rgbImg(text))
plt.show()
# 無title, Z深黑色, Z非負數
img = getImg('0064-S0-維修-殘膠-座標'.decode('utf8').encode('big5'))
plt.imshow(rgbImg(img))
plt.show()
texts = find_text_area(img)
for text in texts:
plt.imshow(rgbImg(text))
plt.show()
img = getImg('0064-S0-維修-殘膠-座標'.decode('utf8').encode('big5'))
plt.imshow(rgbImg(img))
plt.show()
texts = find_text_area(img)
for text in texts:
seg_num(text)
img = getImg('0045-S2-維修-殘膠-座標'.decode('utf8').encode('big5'))
plt.imshow(rgbImg(img))
plt.show()
texts = find_text_area(img)
for text in texts:
seg_num(text)
# 遍歷資料圖片做切片
import os
loading_path = 'D:/AOI/OCR_data/'
saving_path = 'D:/AOI/OCR_segments/step0/'
count = 0
seg_list = []
for dirPath, dirNames, fileNames in os.walk(loading_path):
for fname in fileNames:
if count == 1:
break
else:
count = count + 1
src_path = os.path.join(dirPath, fname)
img = getImg(src_path)
#plt.imshow(img)
#plt.show()
texts = find_text_area(img)
for i, text in enumerate(texts):
seg_list = seg_num(text,(20,20))
for index, seg in enumerate(seg_list):
#path = saving_path + '{}_{}_{}.jpg'.format(fname[:-4], i, index)
#print path
#cv2.imwrite(path, seg)
plt.imshow(seg)
plt.show()
from numpy.linalg import norm
dic = {'11':'blank', '12':'dot', '13':'minus', '14':'um'}
prepare_list = ['0','1','2','3','4','5','6','7','8','9','11','12','13','14']
data_list = []
label_list = []
for label_name in prepare_list:
path = 'D:/AOI/OCR_segments/step1/{}/'.format(label_name)
print path
for dirPath, dirNames, fileNames in os.walk(path):
for fname in fileNames:
src_path = os.path.join(dirPath, fname)
seg_img = getImg(src_path)
res_img = resize(seg_img,(20,20))
gray = grayImg(res_img)
#plt.imshow(gray,'gray')
#plt.show()
gx = cv2.Sobel(gray, cv2.CV_32F, 1, 0)
gy = cv2.Sobel(gray, cv2.CV_32F, 0, 1)
mag, ang = cv2.cartToPolar(gx, gy)
bin_n = 16
bin = np.int32(bin_n*ang/(2*np.pi))
bin_cells = bin[:10,:10], bin[10:,:10], bin[:10,10:], bin[10:,10:]
mag_cells = mag[:10,:10], mag[10:,:10], mag[:10,10:], mag[10:,10:]
hists = [np.bincount(b.ravel(), m.ravel(), bin_n) for b, m in zip(bin_cells, mag_cells)]
hist = np.hstack(hists)
eps = 1e-7
hist /= hist.sum() + eps
hist = np.sqrt(hist)
hist /= norm(hist) + eps
data_list.append(hist)
label_list.append(label_name)
trainingDataMat = np.array(data_list, np.float32)
labelsMat = np.array(label_list, np.int32)
print len(label_list)
i = 97
print trainingDataMat[i], labelsMat[i]
path = 'D:/AOI/OCR_segments/step2/test_data_01/'
count = 0
test_data = []
for dirPath, dirNames, fileNames in os.walk(path):
for fname in fileNames:
if count == -1:
break
else:
src_path = os.path.join(dirPath, fname)
print src_path.decode('big5')
seg_img = getImg(src_path)
res_img = resize(seg_img,(20,20))
gray = grayImg(res_img)
gx = cv2.Sobel(gray, cv2.CV_32F, 1, 0)
gy = cv2.Sobel(gray, cv2.CV_32F, 0, 1)
mag, ang = cv2.cartToPolar(gx, gy)
bin_n = 16
bin = np.int32(bin_n*ang/(2*np.pi))
bin_cells = bin[:10,:10], bin[10:,:10], bin[:10,10:], bin[10:,10:]
mag_cells = mag[:10,:10], mag[10:,:10], mag[:10,10:], mag[10:,10:]
hists = [np.bincount(b.ravel(), m.ravel(), bin_n) for b, m in zip(bin_cells, mag_cells)]
hist = np.hstack(hists)
eps = 1e-7
hist /= hist.sum() + eps
hist = np.sqrt(hist)
hist /= norm(hist) + eps
test_data.append(hist)
count += 1
testDataMat = np.array(test_data, np.float32)
test_data[0]
svm = cv2.ml.SVM_create()
svm.setType(cv2.ml.SVM_C_SVC)
svm.setKernel(cv2.ml.SVM_LINEAR)
#svm.setTermCriteria((cv2.TERM_CRITERIA_COUNT, 100, 1.e-06))
svm.setC(1.0)
svm.train(trainingDataMat, cv2.ml.ROW_SAMPLE, labelsMat)
response = svm.predict(testDataMat)
for res in response[1]:
result = None
if dic.has_key(str(int(res[0]))):
result = dic[str(int(res[0]))]
else:
result = int(res[0])
print result